# Input:    /Data/finalData.RData
# Output:   /Paper/Tables/main.tex
#           /Paper/Tables/main_SI.tex
#           /Paper/Figures/sensitivity_dyad.pdf
# Author:   JB


rm(list = ls())
require(lme4)
require(lfe)
require(tidyverse)
require(ggridges)
require(fixest)

# source('./helper_functions.R')
# load('../data/finalData.RData')
load('../data/finalData_70kGrped.RData')

utterance_level <- utterance_level %>%
  # select(docID,opensecretsID,fullInd,topic70Grped_1) %>%
  arrange(docID,fullInd) %>%
  mutate_at(vars(matches('topic70.*_\\d')),list(lag = ~lag(.)))

# Rigorous fixed effects
dyadToAnal <- utterance_level %>%
  mutate_at(vars(matches('lag')),function(x) ifelse(is.na(x),0,x)) %>%
  mutate(votepct_rel = ifelse(is.infinite(votepct_rel),1,votepct_rel),
         respondingTo = relevel(factor(respondingTo),ref = 'FEDBERNANKE'))

dims <- colnames(utterance_level %>% select(matches('SENT_'),-matches('_lag|error')) %>% select(-matches('SEVERE|AUTHOR|LIKELY')))


summary(feols(as.formula(paste0('interruptor ~ factor(respondingTo)')),
                         dyadToAnal %>% filter(all > 30,ind > mind),
                         cluster = 'opensecretsID + respondingTo'))

summary(modDyad1 <- feols(as.formula(paste0('interruptor ~ respondingTo')),
                         dyadToAnal %>% filter(all > 30,ind > mind),
                         cluster = 'opensecretsID + respondingTo'))

summary(modDyad2 <- feols(as.formula(paste0('interruptor ~ respondingTo + poly(scale(log(tot_utterances)),3)',
                                           ' + scale(age) + scale(votepct) + scale(nominate_dim1) + GOP + gender + scale(seniority) + scale(anyBill) + yellen_vote + chamber')),
                         dyadToAnal %>% 
                           filter(all > 30,ind > mind),
                         cluster = 'opensecretsID + respondingTo'))

summary(modDyad3 <- feols(as.formula(paste0('interruptor ~ respondingTo + ',
                                           paste(paste0('topic_',1:100,'_lag'),collapse = ' + '),
                                           ' + ',
                                           paste(paste0('scale(',dims[-which(grepl('comb',dims))],'_lag)'),collapse = ' + '),
                                           ' + scale(SENT_combAttack_lag) + scale(SENT_combIncoh_lag) + scale(SENT_combToxic_lag)',
                                           ' + scale(age) + scale(votepct) + scale(nominate_dim1) + GOP + gender + scale(seniority) + scale(anyBill) + yellen_vote + chamber',
                                           ' + poly(scale(log(nchars_lag+1)),3) + poly(scale(log(tot_utterances)),3) + interrupted')),
                         dyadToAnal %>% 
                           filter(all > 30,ind > mind),
                         cluster = 'opensecretsID + respondingTo'))

summary(modDyad4 <- feols(as.formula(paste0('interruptor ~ respondingTo + ',
                                           paste(paste0('topic_',1:100,'_lag'),collapse = ' + '),
                                           ' + ',
                                           paste(paste0('scale(',dims[-which(grepl('comb',dims))],'_lag)'),collapse = ' + '),
                                           ' + scale(SENT_combAttack_lag) + scale(SENT_combIncoh_lag) + scale(SENT_combToxic_lag)',
                                           ' + scale(age) + scale(votepct) + scale(nominate_dim1) + GOP + gender + scale(seniority) + scale(anyBill) + yellen_vote + chamber',
                                           ' + poly(scale(log(nchars_lag+1)),3) + poly(scale(log(tot_utterances)),3) + interrupted',
                                           '| docID')),
                         dyadToAnal %>% 
                           filter(all > 30,ind > mind),
                         cluster = 'opensecretsID + respondingTo'))

summary(modDyad5 <- feols(as.formula(paste0('interruptor ~ respondingTo + ',
                                           paste(paste0('topic_',1:100,'_lag'),collapse = ' + '),
                                           ' + ',
                                           paste(paste0('scale(',dims[-which(grepl('comb',dims))],'_lag)'),collapse = ' + '),
                                           ' + scale(SENT_combAttack_lag) + scale(SENT_combIncoh_lag) + scale(SENT_combToxic_lag)',
                                           ' + poly(scale(log(nchars_lag+1)),3) + poly(scale(log(tot_utterances)),3) + interrupted',
                                           '| opensecretsID + docID')),
                         dyadToAnal %>% 
                           filter(all > 30,ind > mind),
                         cluster = 'opensecretsID + respondingTo'))

summary(modDyad6 <- feols(as.formula(paste0('interruptor ~ respondingTo + ',
                                           paste(paste0('topic_',1:100,'_lag'),collapse = ' + '),
                                           ' + ',
                                           paste(paste0('scale(',dims[-which(grepl('comb',dims))],'_lag)'),collapse = ' + '),
                                           ' + scale(SENT_combAttack_lag) + scale(SENT_combIncoh_lag) + scale(SENT_combToxic_lag)',
                                           ' + poly(scale(log(nchars_lag+1)),3) + poly(scale(log(tot_utterances)),3) + interrupted',
                                           '| opensecretsID + docID')),
                         dyadToAnal %>% 
                           filter(all > 30,ind > mind,
                                  yellen_vote != 1),
                         cluster = 'opensecretsID + respondingTo'))

summary(modDyad6 <- feols(as.formula(paste0('interruptor ~ respondingTo + ',
                                            paste(paste0('topic_',1:100,'_lag'),collapse = ' + '),
                                            ' + ',
                                            paste(paste0('scale(',dims[-which(grepl('comb',dims))],'_lag)'),collapse = ' + '),
                                            ' + scale(SENT_combAttack_lag) + scale(SENT_combIncoh_lag) + scale(SENT_combToxic_lag)',
                                            ' + poly(scale(log(nchars_lag+1)),3) + poly(scale(log(tot_utterances)),3) + interrupted',
                                            '| opensecretsID + docID')),
                          dyadToAnal %>% 
                            filter(all > 30,ind > mind,
                                   yellen_vote != 1),
                          cluster = 'opensecretsID + respondingTo'))


dict <- c('respondingToFEDYELLEN' = 'Yellen (ref. Bernanke)','respondingToFEDPOWELL' = 'Powell (ref. Bernanke)',
          'respondingToFEDGREENSPAN' = 'Greenspan (ref. Bernanke)','scale(age)' = 'Age (scaled)',
          'scale(votepct)' = 'Vote Share (scaled)','scale(nominate_dim1)' = 'Ideology (scaled)',
  'GOP' = 'Republican (ref. Democrat)','chamberSenate' = 'Senate (ref. House)',
  'genderM' = 'Male (ref. Female)','scale(seniority)' = 'Seniority (scaled)','scale(anyBill)' = 'Fed Oversight Sponsor',
  'yellen_vote' = 'Oppose Yellen Conf.','interrupted' = 'Interrupted','docID' = 'Hearing','opensecretsID' = 'Speaker')

etable(modDyad1,modDyad2,modDyad3,modDyad4,modDyad5,modDyad6,
       keep = 'ref. |scaled|Oversight|Oppose|Interrupted',
       order = c('^Yellen','Powell','Greenspan','Age','Vote','Ideology','Republican','Senate','Male','Seniority','Oversight','Oppose','Interr'),
       dict = dict,extralines = list('100 LDA Topics' = c('No','No','Yes','Yes','Yes','Yes'),
                                     'Tone Probabilities' = c('No','No','Yes','Yes','Yes','Yes')),
       depvar = F,digits = 3,digits.stats = 3,signif.code = c('***' = .001,'**' = .01,'*' = .05,'\\dag' = .1),replace = T,
       headers = list(c('','Controls:','Controls:','FEs:','FEs:','Dropping'),
                      c('Vanilla','Speaker','Utterance','Hearing','Speaker','Voters')),
       file = '../output/tables/main.tex')


# Robustness and sensitivity analysis
summary(modDyad1 <- feols(as.formula(paste0('interruptor ~ respondingTo')),
                          dyadToAnal %>% filter(all > 0,ind > mind),
                          cluster = 'opensecretsID + respondingTo'))

summary(modDyad2 <- feols(as.formula(paste0('interruptor ~ respondingTo + poly(scale(log(tot_utterances)),3)',
                                            ' + scale(age) + scale(votepct) + scale(nominate_dim1) + GOP + gender + scale(seniority) + scale(anyBill) + yellen_vote + chamber')),
                          dyadToAnal %>% 
                            filter(all > 0,ind > mind),
                          cluster = 'opensecretsID + respondingTo'))

summary(modDyad3 <- feols(as.formula(paste0('interruptor ~ respondingTo + ',
                                            paste(paste0('topic_',1:100,'_lag'),collapse = ' + '),
                                            ' + ',
                                            paste(paste0('scale(',dims[-which(grepl('comb',dims))],'_lag)'),collapse = ' + '),
                                            ' + scale(SENT_combAttack_lag) + scale(SENT_combIncoh_lag) + scale(SENT_combToxic_lag)',
                                            ' + scale(age) + scale(votepct) + scale(nominate_dim1) + GOP + gender + scale(seniority) + scale(anyBill) + yellen_vote + chamber',
                                            ' + poly(scale(log(nchars_lag+1)),3) + poly(scale(log(tot_utterances)),3) + interrupted')),
                          dyadToAnal %>% 
                            filter(all > 0,ind > mind),
                          cluster = 'opensecretsID + respondingTo'))

summary(modDyad4 <- feols(as.formula(paste0('interruptor ~ respondingTo + ',
                                            paste(paste0('topic_',1:100,'_lag'),collapse = ' + '),
                                            ' + ',
                                            paste(paste0('scale(',dims[-which(grepl('comb',dims))],'_lag)'),collapse = ' + '),
                                            ' + scale(SENT_combAttack_lag) + scale(SENT_combIncoh_lag) + scale(SENT_combToxic_lag)',
                                            ' + scale(age) + scale(votepct) + scale(nominate_dim1) + GOP + gender + scale(seniority) + scale(anyBill) + yellen_vote + chamber',
                                            ' + poly(scale(log(nchars_lag+1)),3) + poly(scale(log(tot_utterances)),3) + interrupted',
                                            '| docID')),
                          dyadToAnal %>% 
                            filter(all > 0,ind > mind),
                          cluster = 'opensecretsID + respondingTo'))

summary(modDyad5 <- feols(as.formula(paste0('interruptor ~ respondingTo + ',
                                            paste(paste0('topic_',1:100,'_lag'),collapse = ' + '),
                                            ' + ',
                                            paste(paste0('scale(',dims[-which(grepl('comb',dims))],'_lag)'),collapse = ' + '),
                                            ' + scale(SENT_combAttack_lag) + scale(SENT_combIncoh_lag) + scale(SENT_combToxic_lag)',
                                            ' + poly(scale(log(nchars_lag+1)),3) + poly(scale(log(tot_utterances)),3) + interrupted',
                                            '| opensecretsID + docID')),
                          dyadToAnal %>% 
                            filter(all > 0,ind > mind),
                          cluster = 'opensecretsID + respondingTo'))

summary(modDyad6 <- feols(as.formula(paste0('interruptor ~ respondingTo + ',
                                            paste(paste0('topic_',1:100,'_lag'),collapse = ' + '),
                                            ' + ',
                                            paste(paste0('scale(',dims[-which(grepl('comb',dims))],'_lag)'),collapse = ' + '),
                                            ' + scale(SENT_combAttack_lag) + scale(SENT_combIncoh_lag) + scale(SENT_combToxic_lag)',
                                            ' + poly(scale(log(nchars_lag+1)),3) + poly(scale(log(tot_utterances)),3) + interrupted',
                                            '| opensecretsID + docID')),
                          dyadToAnal %>% 
                            filter(all > 0,ind > mind,
                                   yellen_vote != 1),
                          cluster = 'opensecretsID + respondingTo'))

dict <- c('respondingToFEDYELLEN' = 'Yellen (ref. Bernanke)','respondingToFEDPOWELL' = 'Powell (ref. Bernanke)',
          'respondingToFEDGREENSPAN' = 'Greenspan (ref. Bernanke)','scale(age)' = 'Age (scaled)',
          'scale(votepct)' = 'Vote Share (scaled)','scale(nominate_dim1)' = 'Ideology (scaled)',
          'GOP' = 'Republican (ref. Democrat)','chamberSenate' = 'Senate (ref. House)',
          'genderM' = 'Male (ref. Female)','scale(seniority)' = 'Seniority (scaled)','scale(anyBill)' = 'Fed Oversight Sponsor',
          'yellen_vote' = 'Oppose Yellen Conf.','interrupted' = 'Interrupted','docID' = 'Hearing','opensecretsID' = 'Speaker')

etable(modDyad1,modDyad2,modDyad3,modDyad4,modDyad5,modDyad6,
       keep = 'ref. |scaled|Oversight|Oppose|Interrupted',
       order = c('^Yellen','Powell','Greenspan','Age','Vote','Ideology','Republican','Senate','Male','Seniority','Oversight','Oppose','Interr'),
       dict = dict,extralines = list('100 LDA Topics' = c('No','No','Yes','Yes','Yes','Yes'),
                                     'Tone Probabilities' = c('No','No','Yes','Yes','Yes','Yes')),
       depvar = F,digits = 3,digits.stats = 3,signif.code = c('***' = .001,'**' = .01,'*' = .05,'\\dag' = .1),replace = T,
       headers = list(c('','Controls:','Controls:','FEs:','FEs:','Dropping'),
                      c('Vanilla','Speaker','Utterance','Hearing','Speaker','Voters')),file = '../output/tables/main_SI.tex')


# Sensitivity
require(sensemakr)
summary(modDyad5 <- lm(as.formula(paste0('interruptor ~ yellen + ',
                                         paste(paste0('topic_',1:100,'_lag'),collapse = ' + '),
                                         ' + ',
                                         paste(paste0('scale(',dims[-which(grepl('comb',dims))],'_lag)'),collapse = ' + '),
                                         ' + poly(scale(log(nchars_lag+1)),3) + poly(scale(log(tot_utterances)),3) + interrupted',
                                         ' + factor(opensecretsID) + factor(docID)')),
                       dyadToAnal %>% 
                         mutate(yellen = ifelse(grepl('YELLEN',respondingTo),1,0)) %>% 
                         filter(all > 0,ind > mind,!grepl('FED',opensecretsID))))

sensResTmp <- try(sensemakr(model = modDyad5,
                            treatment = "yellen",
                            benchmark_covariates = 'interrupted',
                            kd = c(12,24),
                            ky = c(12,24),
                            q = 1,
                            alpha = 0.05,
                            reduce = TRUE))

pdf('../output/figures/sensitivity_dyad.pdf',width = 7,height = 7)
plot(sensResTmp,label.bump.y = 0,cex.lab = 1,cex.label.text = .6,round = 2)
abline(0,sensResTmp$bounds$r2yz.dx/sensResTmp$bounds$r2dz.x,col = 'grey60')
dev.off()


# JOP RR1: alternative topic models
summary(modDyadOG <- feols(as.formula(paste0('interruptor ~ respondingTo + ',
                                            paste(paste0('topic_',1:100,'_lag'),collapse = ' + '),
                                            ' + ',
                                            paste(paste0('scale(',dims[-which(grepl('comb',dims))],'_lag)'),collapse = ' + '),
                                            ' + scale(SENT_combAttack_lag) + scale(SENT_combIncoh_lag) + scale(SENT_combToxic_lag)',
                                            ' + poly(scale(log(nchars_lag+1)),3) + poly(scale(log(tot_utterances)),3) + interrupted',
                                            '| opensecretsID + docID')),
                          dyadToAnal %>% 
                            filter(all > 30,ind > mind,
                                   yellen_vote != 1),
                          cluster = 'opensecretsID + respondingTo'))

summary(modDyadGrp70 <- feols(as.formula(paste0('interruptor ~ respondingTo + ',
                                             paste(paste0('topic70Grped_',1:70,'_lag'),collapse = ' + '),
                                             ' + ',
                                             paste(paste0('scale(',dims[-which(grepl('comb',dims))],'_lag)'),collapse = ' + '),
                                             ' + scale(SENT_combAttack_lag) + scale(SENT_combIncoh_lag) + scale(SENT_combToxic_lag)',
                                             ' + poly(scale(log(nchars_lag+1)),3) + poly(scale(log(tot_utterances)),3) + interrupted',
                                             '| opensecretsID + docID')),
                           dyadToAnal %>% 
                             filter(all > 30,ind > mind,
                                    yellen_vote != 1),
                           cluster = 'opensecretsID + respondingTo'))

summary(modDyadSpkr70 <- feols(as.formula(paste0('interruptor ~ respondingTo + ',
                                                paste(paste0('topic70Spkr_',1:70,'_lag'),collapse = ' + '),
                                                ' + ',
                                                paste(paste0('scale(',dims[-which(grepl('comb',dims))],'_lag)'),collapse = ' + '),
                                                ' + scale(SENT_combAttack_lag) + scale(SENT_combIncoh_lag) + scale(SENT_combToxic_lag)',
                                                ' + poly(scale(log(nchars_lag+1)),3) + poly(scale(log(tot_utterances)),3) + interrupted',
                                                '| opensecretsID + docID')),
                              dyadToAnal %>% 
                                filter(all > 30,ind > mind,
                                       yellen_vote != 1),
                              cluster = 'opensecretsID + respondingTo'))


# Substantive topics: JOPRR1
load('../../../../Data/Hearings/topic_models_100.RData')
lda_model$get_top_words(n = 10) %>%
  data.frame() %>%
  rename_all(function(x) gsub('X','topic_',x)) %>%
  mutate(top_word = row_number()) %>%
  as_tibble() %>%
  t()

substantive <- paste0('topic_',c(1,4,6,7,8,10,12,14,18,20,24,26,32,34,38,41,42,43,46,49,
                                 50,52,53,55,58,61,62,63,65,68,72,77,80,83,85,89,90,95,96,100))

summary(modDyadSubs <- feols(as.formula(paste0('interruptor ~ respondingTo + ',
                                                 paste(paste0(substantive,'_lag'),collapse = ' + '),
                                                 ' + ',
                                                 paste(paste0('scale(',dims[-which(grepl('comb',dims))],'_lag)'),collapse = ' + '),
                                                 ' + scale(SENT_combAttack_lag) + scale(SENT_combIncoh_lag) + scale(SENT_combToxic_lag)',
                                                 ' + poly(scale(log(nchars_lag+1)),3) + poly(scale(log(tot_utterances)),3) + interrupted',
                                                 '| opensecretsID + docID')),
                               dyadToAnal %>% 
                                 filter(all > 30,ind > mind,
                                        yellen_vote != 1),
                               cluster = 'opensecretsID + respondingTo'))




# Prognostic topics
require(ranger)

forVimp <- dyadToAnal %>%
  select(interrupted,matches('topic_\\d+$')) %>%
  mutate(interrupted = factor(interrupted))


rangMod <- ranger(interrupted ~ .,forVimp,importance = 'permutation')

forVimp$preds <- rangMod$predictions
table(forVimp$preds,forVimp$interrupted)

topWord <- lda_model$get_top_words(n = 50) %>%
  data.frame() %>%
  rename_all(function(x) gsub('X','topic_',x)) %>%
  mutate(top_word = row_number()) %>%
  as_tibble()


# Looking at which topics are interrupted the most
toplot <- data.frame(topic = names(rangMod$variable.importance),
           vimp = rangMod$variable.importance / rangMod$prediction.error) %>%
  left_join(topWord %>%
              gather(topic,term) %>%
              group_by(topic) %>%
              slice(1:3) %>%
              summarise(terms = paste(term,collapse = ', '))) %>%
  as_tibble() 

pdf('../output/figures/SI_vimp.pdf',width =7,height= 9)
toplot %>%
  ggplot(aes(x = vimp,y = reorder(terms,vimp))) + 
  geom_bar(stat = 'identity') + 
  labs(x = 'Variable Importance (% reduction in error)',
       y = 'Topic',
       title = 'Variable Importance',
       subtitle = 'Topics most effective at predicting interruption')
dev.off()

ctrls <- toplot %>%
  arrange(desc(vimp)) %>%
  slice(1:20) %>%
  pull(topic)


summary(modDyadVIMP <- feols(as.formula(paste0('interruptor ~ respondingTo + ',
                                               paste(paste0(ctrls,'_lag'),collapse = ' + '),
                                               ' + ',
                                               paste(paste0('scale(',dims[-which(grepl('comb',dims))],'_lag)'),collapse = ' + '),
                                               ' + scale(SENT_combAttack_lag) + scale(SENT_combIncoh_lag) + scale(SENT_combToxic_lag)',
                                               ' + poly(scale(log(nchars_lag+1)),3) + poly(scale(log(tot_utterances)),3) + interrupted',
                                               '| opensecretsID + docID')),
                             dyadToAnal %>% 
                               filter(all > 30,ind > mind,
                                      yellen_vote != 1),
                             cluster = 'opensecretsID + respondingTo'))

dict <- c('respondingToFEDYELLEN' = 'Yellen (ref. Bernanke)','respondingToFEDPOWELL' = 'Powell (ref. Bernanke)',
          'respondingToFEDGREENSPAN' = 'Greenspan (ref. Bernanke)','scale(age)' = 'Age (scaled)',
          'scale(votepct)' = 'Vote Share (scaled)','scale(nominate_dim1)' = 'Ideology (scaled)',
          'GOP' = 'Republican (ref. Democrat)','chamberSenate' = 'Senate (ref. House)',
          'genderM' = 'Male (ref. Female)','scale(seniority)' = 'Seniority (scaled)','scale(anyBill)' = 'Fed Oversight Sponsor',
          'yellen_vote' = 'Oppose Yellen Conf.','interrupted' = 'Interrupted','docID' = 'Hearing','opensecretsID' = 'Speaker')


etable(modDyadOG,modDyadGrp70,modDyadSpkr70,modDyadSubs,modDyadVIMP,
       keep = 'ref. |scaled|Oversight|Oppose|Interrupted',
       order = c('^Yellen','Powell','Greenspan','Age','Vote','Ideology','Republican','Senate','Male','Seniority','Oversight','Oppose','Interr'),
       dict = dict,extralines = list('LDA Topics (see columns)' = c('Yes','Yes','Yes','Yes','Yes'),
                                     'Tone Probabilities' = c('Yes','Yes','Yes','Yes','Yes')),
       depvar = F,digits = 3,digits.stats = 3,signif.code = c('***' = .001,'**' = .01,'*' = .05,'\\dag' = .1),replace = T,
       headers = list(c('Per Utterance','Per chunk','Per speaker','Substantive','Prognostic'),
                      c('(100 Topics)','(70 Topics)','(70 Topics)','(40 topics)','(20 topics)')),
       file = '../output/tables/lda_robust.tex')
